##########################################
# Replication Data for Proksch, Lowe, Wäckerle, Soroka. (2018). Multilingual Sentiment Analysis: A New Approach to Measuring Conflict in Legislative Speeches. Legislative Studies Quarterly, Forthcoming.
##########################################

#Part 4: Debate Sentiment and the Legislative Process in Germany
##########################
#Intro

rm(list=ls())
library(readtext)
library(tidyverse)
library(quanteda)
library(stringr)
library(rstudioapi)
require(devtools)
#install_version("quanteda", version = "1.1.1", repos = "http://cran.us.r-project.org") #all analysis is run on quanteda version 1.1.1

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))

load("4_16leg_metadata_speakers_complete_df_encoded.Rdata")
load("4_17leg_metadata_speakers_complete_df_encoded.Rdata")
load("4_18leg_metadata_speakers_complete_df_encoded.Rdata")
load("4_16leg_metadata_df.Rdata")
load("4_17leg_metadata_df.Rdata")
load("4_18leg_metadata_df.Rdata")
load("4_speeches_Germany.RData")
load("lsde_frenche_germane.RData")

metadata_df=rbind(metadata_df16,metadata_df17,metadata_df18)
metadata_speakers_complete_df16$index_complete=paste0(metadata_speakers_complete_df16$Drucksachennummer,"/",metadata_speakers_complete_df16$Speech_ID)
metadata_speakers_complete_df17$index_complete=paste0(metadata_speakers_complete_df17$Drucksachennummer,"/",metadata_speakers_complete_df17$Speech_ID)
metadata_speakers_complete_df18$index_complete=paste0(metadata_speakers_complete_df18$Drucksachennummer,"/",metadata_speakers_complete_df18$Speech_ID)
metadata_speakers_complete_df=rbind(metadata_speakers_complete_df16,metadata_speakers_complete_df17,metadata_speakers_complete_df18)


#define bills proposed by the government
texts_joined$Regierungsvorlage=NA
texts_joined$Regierungsvorlage[texts_joined$Initiator=="Bundesregierung"]=1
texts_joined$Regierungsvorlage[texts_joined$Initiator=="Fraktion der CDU/CSU;Fraktion der SPD"&
                                 texts_joined$Legislative_period%in%c(16,18)]=1
texts_joined$Regierungsvorlage[texts_joined$Initiator=="Fraktion der CDU/CSU;Fraktion der FDP"&
                                 texts_joined$Legislative_period%in%c(17)]=1
texts_joined$Regierungsvorlage[is.na(texts_joined$Regierungsvorlage)]=0

#extract voting info for budget bills
metadata_speakers_complete_df$Beschluss_neu=str_trim(metadata_speakers_complete_df$Beschluss)
metadata_speakers_complete_df$Abstimmungsergebnis_clean=NA
metadata_speakers_complete_df$Abstimmungsergebnis_clean[!is.na(metadata_speakers_complete_df$Beschluss_neu)]=str_extract(metadata_speakers_complete_df$Beschluss_neu[!is.na(metadata_speakers_complete_df$Beschluss_neu)],"\\d+:\\d+:\\d+")
metadata_speakers_complete_df$Abstimmungsergebnis_clean[is.na(metadata_speakers_complete_df$Beschluss_neu)]=str_extract(metadata_speakers_complete_df$Abstimmungsergebnis[is.na(metadata_speakers_complete_df$Beschluss_neu)],"\\d+:\\d+:\\d+")
metadata_speakers_complete_df$Beschluss_neu=gsub("(\\w+[[:punct:]]\\w+)","",metadata_speakers_complete_df$Beschluss_neu)
metadata_speakers_complete_df$Beschluss_neu=gsub("(\\d+[[:upper:]]+)","",metadata_speakers_complete_df$Beschluss_neu)
metadata_speakers_complete_df$Beschluss_neu=gsub("","",metadata_speakers_complete_df$Beschluss_neu)

metadata_speakers_complete_df$druck_reading=paste(metadata_speakers_complete_df$Drucksachennummer,metadata_speakers_complete_df$Reading,sep="/")
voting_results=metadata_speakers_complete_df%>%filter(!duplicated(druck_reading))

voting_results$Abstimmungsergebnis_clean[is.na(voting_results$Abstimmungsergebnis_clean)]=voting_results$Abstimmungsergebnis[is.na(voting_results$Abstimmungsergebnis_clean)]
metadata_df$vote_first_reading=NA
metadata_df$vote_second_reading=NA
metadata_df$vote_third_reading=NA
for(i in 1:nrow(metadata_df)){
  temp=voting_results$Abstimmungsergebnis_clean[voting_results$Drucksachennummer==metadata_df$Drucksachennummer[i]&voting_results$Reading==1]
  temp=temp[!is.na(temp)]
  if(length(temp>0)){
  metadata_df$vote_first_reading[i]=temp
  }
  temp2=voting_results$Abstimmungsergebnis_clean[voting_results$Drucksachennummer==metadata_df$Drucksachennummer[i]&voting_results$Reading==2]
  temp2=temp2[!is.na(temp2)]
  if(length(temp2>0)){
    metadata_df$vote_second_reading[i]=temp2
  }
  temp3=voting_results$Abstimmungsergebnis_clean[voting_results$Drucksachennummer==metadata_df$Drucksachennummer[i]&voting_results$Reading==3]
  temp3=temp3[!is.na(temp3)]
  if(length(temp3>0)){
    metadata_df$vote_third_reading[i]=temp3
  }
}
metadata_df$unanimity_first_reading=ifelse(grepl("einstimmig",metadata_df$vote_first_reading),1,0)
metadata_df$unanimity_second_reading=ifelse(grepl("einstimmig",metadata_df$vote_second_reading),1,0)
metadata_df$unanimity_third_reading=ifelse(grepl("einstimmig",metadata_df$vote_third_reading),1,0)

metadata_df$unanimity_first_reading[grepl("\\d+:0:0",metadata_df$vote_first_reading)]=1
metadata_df$unanimity_second_reading[grepl("\\d+:0:0",metadata_df$vote_second_reading)]=1
metadata_df$unanimity_third_reading[grepl("\\d+:0:0",metadata_df$vote_third_reading)]=1
metadata_df$unanimity_any_reading=ifelse(metadata_df$unanimity_first_reading==1|metadata_df$unanimity_second_reading==1|metadata_df$unanimity_third_reading==1,1,0)

#combine speeches with metadata into corpus
corp.germany=corpus(texts_joined,text_field = "text")

#Read in corrected dictionaries
#Run sentiment analysis with corrected german sentiment dictionary
senti_germany_debates=data.frame(dfm(corp.germany,dictionary = extendeddict_de_e))
#Add in metadata
senti_germany_debates=cbind(senti_germany_debates,texts_joined)
#calculate sentiment
senti_germany_debates$Sentiment=log((senti_germany_debates$pos+0.5)/(senti_germany_debates$neg+0.5))
#define party, get cabinet and external as special categories
senti_germany_debates$group=senti_germany_debates$`Speaker party`
senti_germany_debates$group[grepl("minister",senti_germany_debates$`Speaker party`)]="Cabinet"
senti_germany_debates$group[grepl("Minister",senti_germany_debates$`Speaker party`)]="external"
senti_germany_debates$group[grepl("minister",senti_germany_debates$`Speaker party`)]="external"
senti_germany_debates$group[grepl("Auswärtiges Amt",senti_germany_debates$`Speaker party`)]="Cabinet"
senti_germany_debates$group[grepl("Bundeskanzleramt",senti_germany_debates$`Speaker party`)]="Cabinet"
senti_germany_debates$group[grepl("Bundesministerium",senti_germany_debates$`Speaker party`)]="Cabinet"
senti_germany_debates$group[grepl("Senat",senti_germany_debates$`Speaker party`)]="external"
senti_germany_debates$group[grepl("Ministerpräsident",senti_germany_debates$`Speaker party`)]="external"
table(senti_germany_debates$group)

#get group level positive and negative words for each bill
pos_group_debate=aggregate(pos~group+Drucksachennummer.x+Reading,senti_germany_debates,FUN=sum)
neg_group_debate=aggregate(neg~group+Drucksachennummer.x+Reading,senti_germany_debates,FUN=sum)

#join and calculate sentiment
senti_group_parties=left_join(pos_group_debate,neg_group_debate)
senti_group_parties$Sentiment=log((senti_group_parties$pos+0.5)/(senti_group_parties$neg+0.5))
senti_group_parties=rename(senti_group_parties,Drucksachennummer=Drucksachennummer.x)

#join group level sentiment to bill level metadata, seperately for each group and each reading
metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="Cabinet"&Reading==1)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_Cabinet_Reading1=pos)
metadata_df=rename(metadata_df,Neg_Cabinet_Reading1=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="Cabinet"&Reading==2)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_Cabinet_Reading2=pos)
metadata_df=rename(metadata_df,Neg_Cabinet_Reading2=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="Cabinet"&Reading==3)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_Cabinet_Reading3=pos)
metadata_df=rename(metadata_df,Neg_Cabinet_Reading3=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="CDU/CSU"&Reading==1)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_CDU_CSU_Reading1=pos)
metadata_df=rename(metadata_df,Neg_CDU_CSU_Reading1=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="CDU/CSU"&Reading==2)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_CDU_CSU_Reading2=pos)
metadata_df=rename(metadata_df,Neg_CDU_CSU_Reading2=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="CDU/CSU"&Reading==3)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_CDU_CSU_Reading3=pos)
metadata_df=rename(metadata_df,Neg_CDU_CSU_Reading3=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="SPD"&Reading==1)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_SPD_Reading1=pos)
metadata_df=rename(metadata_df,Neg_SPD_Reading1=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="SPD"&Reading==2)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_SPD_Reading2=pos)
metadata_df=rename(metadata_df,Neg_SPD_Reading2=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="SPD"&Reading==3)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_SPD_Reading3=pos)
metadata_df=rename(metadata_df,Neg_SPD_Reading3=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="FDP"&Reading==1)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_FDP_Reading1=pos)
metadata_df=rename(metadata_df,Neg_FDP_Reading1=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="FDP"&Reading==2)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_FDP_Reading2=pos)
metadata_df=rename(metadata_df,Neg_FDP_Reading2=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="FDP"&Reading==3)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_FDP_Reading3=pos)
metadata_df=rename(metadata_df,Neg_FDP_Reading3=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="BÜNDNIS 90/DIE GRÜNEN"&Reading==1)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_GRUENE_Reading1=pos)
metadata_df=rename(metadata_df,Neg_GRUENE_Reading1=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="BÜNDNIS 90/DIE GRÜNEN"&Reading==2)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_GRUENE_Reading2=pos)
metadata_df=rename(metadata_df,Neg_GRUENE_Reading2=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="BÜNDNIS 90/DIE GRÜNEN"&Reading==3)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_GRUENE_Reading3=pos)
metadata_df=rename(metadata_df,Neg_GRUENE_Reading3=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="DIE LINKE"&Reading==1)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_LINKE_Reading1=pos)
metadata_df=rename(metadata_df,Neg_LINKE_Reading1=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="DIE LINKE"&Reading==2)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_LINKE_Reading2=pos)
metadata_df=rename(metadata_df,Neg_LINKE_Reading2=neg)

metadata_df=left_join(metadata_df,senti_group_parties%>%filter(group=="DIE LINKE"&Reading==3)%>%
                        select(Drucksachennummer,pos,neg))
metadata_df=rename(metadata_df,Pos_LINKE_Reading3=pos)
metadata_df=rename(metadata_df,Neg_LINKE_Reading3=neg)

#Aggregate positive and negative words by government, prime minister, junior coalition partner and opposition status
metadata_df$Government_Pos_Reading1=NA
metadata_df$Government_Pos_Reading2=NA
metadata_df$Government_Pos_Reading3=NA

metadata_df$Prime_Minister_Pos_Reading1=NA
metadata_df$Prime_Minister_Pos_Reading2=NA
metadata_df$Prime_Minister_Pos_Reading3=NA

metadata_df$Junior_Coalition_Partner_Pos_Reading1=NA
metadata_df$Junior_Coalition_Partner_Pos_Reading2=NA
metadata_df$Junior_Coalition_Partner_Pos_Reading3=NA

metadata_df$Cabinet_Pos_Reading1=NA
metadata_df$Cabinet_Pos_Reading2=NA
metadata_df$Cabinet_Pos_Reading3=NA

metadata_df$Opposition_Pos_Reading1=NA
metadata_df$Opposition_Pos_Reading2=NA
metadata_df$Opposition_Pos_Reading3=NA

metadata_df$Government_Neg_Reading1=NA
metadata_df$Government_Neg_Reading2=NA
metadata_df$Government_Neg_Reading3=NA

metadata_df$Prime_Minister_Neg_Reading1=NA
metadata_df$Prime_Minister_Neg_Reading2=NA
metadata_df$Prime_Minister_Neg_Reading3=NA

metadata_df$Junior_Coalition_Partner_Neg_Reading1=NA
metadata_df$Junior_Coalition_Partner_Neg_Reading2=NA
metadata_df$Junior_Coalition_Partner_Neg_Reading3=NA

metadata_df$Cabinet_Neg_Reading1=NA
metadata_df$Cabinet_Neg_Reading2=NA
metadata_df$Cabinet_Neg_Reading3=NA

metadata_df$Opposition_Neg_Reading1=NA
metadata_df$Opposition_Neg_Reading2=NA
metadata_df$Opposition_Neg_Reading3=NA

for(i in 1:nrow(metadata_df)){
  if(metadata_df$Legislative_period[i]==16){
    #Government: CDU/CSU, SPD
    metadata_df$Government_Pos_Reading1[i]=sum(metadata_df$Pos_CDU_CSU_Reading1[i],metadata_df$Pos_SPD_Reading1[i],na.rm=T)
    metadata_df$Government_Pos_Reading2[i]=sum(metadata_df$Pos_CDU_CSU_Reading2[i],metadata_df$Pos_SPD_Reading2[i],na.rm=T)
    metadata_df$Government_Pos_Reading3[i]=sum(metadata_df$Pos_CDU_CSU_Reading3[i],metadata_df$Pos_SPD_Reading3[i],na.rm=T)
    metadata_df$Government_Neg_Reading1[i]=sum(metadata_df$Neg_CDU_CSU_Reading1[i],metadata_df$Neg_SPD_Reading1[i],na.rm=T)
    metadata_df$Government_Neg_Reading2[i]=sum(metadata_df$Neg_CDU_CSU_Reading2[i],metadata_df$Neg_SPD_Reading2[i],na.rm=T)
    metadata_df$Government_Neg_Reading3[i]=sum(metadata_df$Neg_CDU_CSU_Reading3[i],metadata_df$Neg_SPD_Reading3[i],na.rm=T)
    #Prime Minister: CDU/CSU
    metadata_df$Prime_Minister_Pos_Reading1[i]=sum(metadata_df$Pos_CDU_CSU_Reading1[i],na.rm=T)
    metadata_df$Prime_Minister_Pos_Reading2[i]=sum(metadata_df$Pos_CDU_CSU_Reading2[i],na.rm=T)
    metadata_df$Prime_Minister_Pos_Reading3[i]=sum(metadata_df$Pos_CDU_CSU_Reading3[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading1[i]=sum(metadata_df$Neg_CDU_CSU_Reading1[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading2[i]=sum(metadata_df$Neg_CDU_CSU_Reading2[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading3[i]=sum(metadata_df$Neg_CDU_CSU_Reading3[i],na.rm=T)
    #Junior Coalition Partner: SPD
    metadata_df$Junior_Coalition_Partner_Pos_Reading1[i]=sum(metadata_df$Pos_SPD_Reading1[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Pos_Reading2[i]=sum(metadata_df$Pos_SPD_Reading2[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Pos_Reading3[i]=sum(metadata_df$Pos_SPD_Reading3[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading1[i]=sum(metadata_df$Neg_SPD_Reading1[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading2[i]=sum(metadata_df$Neg_SPD_Reading2[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading3[i]=sum(metadata_df$Neg_SPD_Reading3[i],na.rm=T)
    #Opposition: FDP, Linke, Gruene
    metadata_df$Opposition_Pos_Reading1[i]=sum(metadata_df$Pos_FDP_Reading1[i],metadata_df$Pos_GRUENE_Reading1[i],metadata_df$Pos_LINKE_Reading1[i],na.rm=T)
    metadata_df$Opposition_Pos_Reading2[i]=sum(metadata_df$Pos_FDP_Reading2[i],metadata_df$Pos_GRUENE_Reading2[i],metadata_df$Pos_LINKE_Reading2[i],na.rm=T)
    metadata_df$Opposition_Pos_Reading3[i]=sum(metadata_df$Pos_FDP_Reading3[i],metadata_df$Pos_GRUENE_Reading3[i],metadata_df$Pos_LINKE_Reading3[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading1[i]=sum(metadata_df$Neg_FDP_Reading1[i],metadata_df$Neg_GRUENE_Reading1[i],metadata_df$Neg_LINKE_Reading1[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading2[i]=sum(metadata_df$Neg_FDP_Reading2[i],metadata_df$Neg_GRUENE_Reading2[i],metadata_df$Neg_LINKE_Reading2[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading3[i]=sum(metadata_df$Neg_FDP_Reading3[i],metadata_df$Neg_GRUENE_Reading3[i],metadata_df$Neg_LINKE_Reading3[i],na.rm=T)
    #Cabinet
    metadata_df$Cabinet_Pos_Reading1[i]=sum(metadata_df$Pos_Cabinet_Reading1[i],na.rm=T)
    metadata_df$Cabinet_Pos_Reading2[i]=sum(metadata_df$Pos_Cabinet_Reading2[i],na.rm=T)
    metadata_df$Cabinet_Pos_Reading3[i]=sum(metadata_df$Pos_Cabinet_Reading3[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading1[i]=sum(metadata_df$Neg_Cabinet_Reading1[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading2[i]=sum(metadata_df$Neg_Cabinet_Reading2[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading3[i]=sum(metadata_df$Neg_Cabinet_Reading3[i],na.rm=T)
    }
  if(metadata_df$Legislative_period[i]==17){
    #Government: CDU/CSU, FDP
    metadata_df$Government_Pos_Reading1[i]=sum(metadata_df$Pos_CDU_CSU_Reading1[i],metadata_df$Pos_FDP_Reading1[i],na.rm=T)
    metadata_df$Government_Pos_Reading2[i]=sum(metadata_df$Pos_CDU_CSU_Reading2[i],metadata_df$Pos_FDP_Reading2[i],na.rm=T)
    metadata_df$Government_Pos_Reading3[i]=sum(metadata_df$Pos_CDU_CSU_Reading3[i],metadata_df$Pos_FDP_Reading3[i],na.rm=T)
    metadata_df$Government_Neg_Reading1[i]=sum(metadata_df$Neg_CDU_CSU_Reading1[i],metadata_df$Neg_FDP_Reading1[i],na.rm=T)
    metadata_df$Government_Neg_Reading2[i]=sum(metadata_df$Neg_CDU_CSU_Reading2[i],metadata_df$Neg_FDP_Reading2[i],na.rm=T)
    metadata_df$Government_Neg_Reading3[i]=sum(metadata_df$Neg_CDU_CSU_Reading3[i],metadata_df$Neg_FDP_Reading3[i],na.rm=T)
    #Prime Minister: CDU/CSU
    metadata_df$Prime_Minister_Pos_Reading1[i]=sum(metadata_df$Pos_CDU_CSU_Reading1[i],na.rm=T)
    metadata_df$Prime_Minister_Pos_Reading2[i]=sum(metadata_df$Pos_CDU_CSU_Reading2[i],na.rm=T)
    metadata_df$Prime_Minister_Pos_Reading3[i]=sum(metadata_df$Pos_CDU_CSU_Reading3[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading1[i]=sum(metadata_df$Neg_CDU_CSU_Reading1[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading2[i]=sum(metadata_df$Neg_CDU_CSU_Reading2[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading3[i]=sum(metadata_df$Neg_CDU_CSU_Reading3[i],na.rm=T)
    #Junior Coalition Partner: FDP
    metadata_df$Junior_Coalition_Partner_Pos_Reading1[i]=sum(metadata_df$Pos_FDP_Reading1[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Pos_Reading2[i]=sum(metadata_df$Pos_FDP_Reading2[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Pos_Reading3[i]=sum(metadata_df$Pos_FDP_Reading3[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading1[i]=sum(metadata_df$Neg_FDP_Reading1[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading2[i]=sum(metadata_df$Neg_FDP_Reading2[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading3[i]=sum(metadata_df$Neg_FDP_Reading3[i],na.rm=T)
    #Opposition: SPD, Linke, Gruene
    metadata_df$Opposition_Pos_Reading1[i]=sum(metadata_df$Pos_SPD_Reading1[i],metadata_df$Pos_GRUENE_Reading1[i],metadata_df$Pos_LINKE_Reading1[i],na.rm=T)
    metadata_df$Opposition_Pos_Reading2[i]=sum(metadata_df$Pos_SPD_Reading2[i],metadata_df$Pos_GRUENE_Reading2[i],metadata_df$Pos_LINKE_Reading2[i],na.rm=T)
    metadata_df$Opposition_Pos_Reading3[i]=sum(metadata_df$Pos_SPD_Reading3[i],metadata_df$Pos_GRUENE_Reading3[i],metadata_df$Pos_LINKE_Reading3[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading1[i]=sum(metadata_df$Neg_SPD_Reading1[i],metadata_df$Neg_GRUENE_Reading1[i],metadata_df$Neg_LINKE_Reading1[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading2[i]=sum(metadata_df$Neg_SPD_Reading2[i],metadata_df$Neg_GRUENE_Reading2[i],metadata_df$Neg_LINKE_Reading2[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading3[i]=sum(metadata_df$Neg_SPD_Reading3[i],metadata_df$Neg_GRUENE_Reading3[i],metadata_df$Neg_LINKE_Reading3[i],na.rm=T)
    #Cabinet
    metadata_df$Cabinet_Pos_Reading1[i]=sum(metadata_df$Pos_Cabinet_Reading1[i],na.rm=T)
    metadata_df$Cabinet_Pos_Reading2[i]=sum(metadata_df$Pos_Cabinet_Reading2[i],na.rm=T)
    metadata_df$Cabinet_Pos_Reading3[i]=sum(metadata_df$Pos_Cabinet_Reading3[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading1[i]=sum(metadata_df$Neg_Cabinet_Reading1[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading2[i]=sum(metadata_df$Neg_Cabinet_Reading2[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading3[i]=sum(metadata_df$Neg_Cabinet_Reading3[i],na.rm=T)
    }
  if(metadata_df$Legislative_period[i]==18){
    #Government: CDU/CSU, SPD
    metadata_df$Government_Pos_Reading1[i]=sum(metadata_df$Pos_CDU_CSU_Reading1[i],metadata_df$Pos_SPD_Reading1[i],na.rm=T)
    metadata_df$Government_Pos_Reading2[i]=sum(metadata_df$Pos_CDU_CSU_Reading2[i],metadata_df$Pos_SPD_Reading2[i],na.rm=T)
    metadata_df$Government_Pos_Reading3[i]=sum(metadata_df$Pos_CDU_CSU_Reading3[i],metadata_df$Pos_SPD_Reading3[i],na.rm=T)
    metadata_df$Government_Neg_Reading1[i]=sum(metadata_df$Neg_CDU_CSU_Reading1[i],metadata_df$Neg_SPD_Reading1[i],na.rm=T)
    metadata_df$Government_Neg_Reading2[i]=sum(metadata_df$Neg_CDU_CSU_Reading2[i],metadata_df$Neg_SPD_Reading2[i],na.rm=T)
    metadata_df$Government_Neg_Reading3[i]=sum(metadata_df$Neg_CDU_CSU_Reading3[i],metadata_df$Neg_SPD_Reading3[i],na.rm=T)
    #Prime Minister: CDU/CSU
    metadata_df$Prime_Minister_Pos_Reading1[i]=sum(metadata_df$Pos_CDU_CSU_Reading1[i],na.rm=T)
    metadata_df$Prime_Minister_Pos_Reading2[i]=sum(metadata_df$Pos_CDU_CSU_Reading2[i],na.rm=T)
    metadata_df$Prime_Minister_Pos_Reading3[i]=sum(metadata_df$Pos_CDU_CSU_Reading3[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading1[i]=sum(metadata_df$Neg_CDU_CSU_Reading1[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading2[i]=sum(metadata_df$Neg_CDU_CSU_Reading2[i],na.rm=T)
    metadata_df$Prime_Minister_Neg_Reading3[i]=sum(metadata_df$Neg_CDU_CSU_Reading3[i],na.rm=T)
    #Junior Coalition Partner: SPD
    metadata_df$Junior_Coalition_Partner_Pos_Reading1[i]=sum(metadata_df$Pos_SPD_Reading1[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Pos_Reading2[i]=sum(metadata_df$Pos_SPD_Reading2[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Pos_Reading3[i]=sum(metadata_df$Pos_SPD_Reading3[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading1[i]=sum(metadata_df$Neg_SPD_Reading1[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading2[i]=sum(metadata_df$Neg_SPD_Reading2[i],na.rm=T)
    metadata_df$Junior_Coalition_Partner_Neg_Reading3[i]=sum(metadata_df$Neg_SPD_Reading3[i],na.rm=T)
    #Opposition: FDP, Linke, Gruene
    metadata_df$Opposition_Pos_Reading1[i]=sum(metadata_df$Pos_GRUENE_Reading1[i],metadata_df$Pos_LINKE_Reading1[i],na.rm=T)
    metadata_df$Opposition_Pos_Reading2[i]=sum(metadata_df$Pos_GRUENE_Reading2[i],metadata_df$Pos_LINKE_Reading2[i],na.rm=T)
    metadata_df$Opposition_Pos_Reading3[i]=sum(metadata_df$Pos_GRUENE_Reading3[i],metadata_df$Pos_LINKE_Reading3[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading1[i]=sum(metadata_df$Neg_GRUENE_Reading1[i],metadata_df$Neg_LINKE_Reading1[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading2[i]=sum(metadata_df$Neg_GRUENE_Reading2[i],metadata_df$Neg_LINKE_Reading2[i],na.rm=T)
    metadata_df$Opposition_Neg_Reading3[i]=sum(metadata_df$Neg_GRUENE_Reading3[i],metadata_df$Neg_LINKE_Reading3[i],na.rm=T)
    #Cabinet
    metadata_df$Cabinet_Pos_Reading1[i]=sum(metadata_df$Pos_Cabinet_Reading1[i],na.rm=T)
    metadata_df$Cabinet_Pos_Reading2[i]=sum(metadata_df$Pos_Cabinet_Reading2[i],na.rm=T)
    metadata_df$Cabinet_Pos_Reading3[i]=sum(metadata_df$Pos_Cabinet_Reading3[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading1[i]=sum(metadata_df$Neg_Cabinet_Reading1[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading2[i]=sum(metadata_df$Neg_Cabinet_Reading2[i],na.rm=T)
    metadata_df$Cabinet_Neg_Reading3[i]=sum(metadata_df$Neg_Cabinet_Reading3[i],na.rm=T)
  }
}

#Calculate Sentiment on status level for each reading and overall
metadata_df$Government_Sentiment_Reading1=log((metadata_df$Government_Pos_Reading1+0.5)/
                                                (metadata_df$Government_Neg_Reading1+0.5))
metadata_df$Government_Sentiment_Reading1[metadata_df$Government_Pos_Reading1==0]=NA
metadata_df$Government_Sentiment_Reading2=log((metadata_df$Government_Pos_Reading2+0.5)/
                                                (metadata_df$Government_Neg_Reading2+0.5))
metadata_df$Government_Sentiment_Reading2[metadata_df$Government_Pos_Reading2==0]=NA
metadata_df$Government_Sentiment_Reading3=log((metadata_df$Government_Pos_Reading3+0.5)/
                                                (metadata_df$Government_Neg_Reading3+0.5))
metadata_df$Government_Sentiment_Reading3[metadata_df$Government_Pos_Reading3==0]=NA

metadata_df$Government_Sentiment_Overall=log((metadata_df$Government_Pos_Reading1+
                                                metadata_df$Government_Pos_Reading2+
                                                metadata_df$Government_Pos_Reading3+0.5)/
                                               (metadata_df$Government_Neg_Reading1+
                                                  metadata_df$Government_Neg_Reading2+
                                                  metadata_df$Government_Neg_Reading3+0.5))
metadata_df$Government_Sentiment_Overall[metadata_df$Government_Pos_Reading1==0&
                                           metadata_df$Government_Pos_Reading2==0&
                                           metadata_df$Government_Pos_Reading3==0]=NA

metadata_df$Prime_Minister_Sentiment_Reading1=log((metadata_df$Prime_Minister_Pos_Reading1+0.5)/
                                                    (metadata_df$Prime_Minister_Neg_Reading1+0.5))
metadata_df$Prime_Minister_Sentiment_Reading1[metadata_df$Prime_Minister_Pos_Reading1==0]=NA
metadata_df$Prime_Minister_Sentiment_Reading2=log((metadata_df$Prime_Minister_Pos_Reading2+0.5)/
                                                    (metadata_df$Prime_Minister_Neg_Reading2+0.5))
metadata_df$Prime_Minister_Sentiment_Reading2[metadata_df$Prime_Minister_Pos_Reading2==0]=NA
metadata_df$Prime_Minister_Sentiment_Reading3=log((metadata_df$Prime_Minister_Pos_Reading3+0.5)/
                                                    (metadata_df$Prime_Minister_Neg_Reading3+0.5))
metadata_df$Prime_Minister_Sentiment_Reading3[metadata_df$Prime_Minister_Pos_Reading3==0]=NA
metadata_df$Prime_Minister_Sentiment_Overall=log((metadata_df$Prime_Minister_Pos_Reading1+
                                                    metadata_df$Prime_Minister_Pos_Reading2+
                                                    metadata_df$Prime_Minister_Pos_Reading3+0.5)/
                                                   (metadata_df$Prime_Minister_Neg_Reading1+
                                                      metadata_df$Prime_Minister_Neg_Reading2+
                                                      metadata_df$Prime_Minister_Neg_Reading3+0.5))
metadata_df$Prime_Minister_Sentiment_Overall[metadata_df$Prime_Minister_Pos_Reading1==0&
                                               metadata_df$Prime_Minister_Pos_Reading2==0&
                                               metadata_df$Prime_Minister_Pos_Reading3==0]=NA

metadata_df$Junior_Coalition_Partner_Sentiment_Reading1=log((metadata_df$Junior_Coalition_Partner_Pos_Reading1+0.5)/
                                                              (metadata_df$Junior_Coalition_Partner_Neg_Reading1+0.5))
metadata_df$Junior_Coalition_Partner_Sentiment_Reading1[metadata_df$Junior_Coalition_Partner_Pos_Reading1==0]=NA
metadata_df$Junior_Coalition_Partner_Sentiment_Reading2=log((metadata_df$Junior_Coalition_Partner_Pos_Reading2+0.5)/
                                                              (metadata_df$Junior_Coalition_Partner_Neg_Reading2+0.5))
metadata_df$Junior_Coalition_Partner_Sentiment_Reading2[metadata_df$Junior_Coalition_Partner_Pos_Reading2==0]=NA
metadata_df$Junior_Coalition_Partner_Sentiment_Reading3=log((metadata_df$Junior_Coalition_Partner_Pos_Reading3+0.5)/
                                                              (metadata_df$Junior_Coalition_Partner_Neg_Reading3+0.5))
metadata_df$Junior_Coalition_Partner_Sentiment_Reading3[metadata_df$Junior_Coalition_Partner_Pos_Reading3==0]=NA
metadata_df$Junior_Coalition_Partner_Sentiment_Overall=log((metadata_df$Junior_Coalition_Partner_Pos_Reading1+
                                                              metadata_df$Junior_Coalition_Partner_Pos_Reading2+
                                                              metadata_df$Junior_Coalition_Partner_Pos_Reading3+0.5)/
                                                             (metadata_df$Junior_Coalition_Partner_Neg_Reading1+
                                                                metadata_df$Junior_Coalition_Partner_Neg_Reading2+
                                                                metadata_df$Junior_Coalition_Partner_Neg_Reading3+0.5))
metadata_df$Junior_Coalition_Partner_Sentiment_Overall[metadata_df$Junior_Coalition_Partner_Pos_Reading1==0&
                                                         metadata_df$Junior_Coalition_Partner_Pos_Reading2==0&
                                                         metadata_df$Junior_Coalition_Partner_Pos_Reading3==0]=NA

metadata_df$Opposition_Sentiment_Reading1=log((metadata_df$Opposition_Pos_Reading1+0.5)/
                                                (metadata_df$Opposition_Neg_Reading1+0.5))
metadata_df$Opposition_Sentiment_Reading1[metadata_df$Opposition_Pos_Reading1==0]=NA
metadata_df$Opposition_Sentiment_Reading2=log((metadata_df$Opposition_Pos_Reading2+0.5)/
                                                (metadata_df$Opposition_Neg_Reading2+0.5))
metadata_df$Opposition_Sentiment_Reading2[metadata_df$Opposition_Pos_Reading2==0]=NA
metadata_df$Opposition_Sentiment_Reading3=log((metadata_df$Opposition_Pos_Reading3+0.5)/
                                                (metadata_df$Opposition_Neg_Reading3+0.5))
metadata_df$Opposition_Sentiment_Reading3[metadata_df$Opposition_Pos_Reading3==0]=NA
metadata_df$Opposition_Sentiment_Overall=log((metadata_df$Opposition_Pos_Reading1+
                                                metadata_df$Opposition_Pos_Reading2+
                                                metadata_df$Opposition_Pos_Reading3+0.5)/
                                               (metadata_df$Opposition_Neg_Reading1+
                                                  metadata_df$Opposition_Neg_Reading2+
                                                  metadata_df$Opposition_Neg_Reading3+0.5))
metadata_df$Opposition_Sentiment_Overall[metadata_df$Opposition_Pos_Reading1==0&
                                           metadata_df$Opposition_Pos_Reading2==0&
                                           metadata_df$Opposition_Pos_Reading3==0]=NA

metadata_df$Cabinet_Sentiment_Reading1=log((metadata_df$Cabinet_Pos_Reading1+0.5)/
                                             (metadata_df$Cabinet_Neg_Reading1+0.5))
metadata_df$Cabinet_Sentiment_Reading1[metadata_df$Cabinet_Pos_Reading1==0]=NA
metadata_df$Cabinet_Sentiment_Reading2=log((metadata_df$Cabinet_Pos_Reading2+0.5)/
                                             (metadata_df$Cabinet_Neg_Reading2+0.5))
metadata_df$Cabinet_Sentiment_Reading2[metadata_df$Cabinet_Pos_Reading2==0]=NA
metadata_df$Cabinet_Sentiment_Reading3=log((metadata_df$Cabinet_Pos_Reading3+0.5)
                                           /(metadata_df$Cabinet_Neg_Reading3+0.5))
metadata_df$Cabinet_Sentiment_Reading3[metadata_df$Cabinet_Pos_Reading3==0]=NA
metadata_df$Cabinet_Sentiment_Overall=log((metadata_df$Cabinet_Pos_Reading1+
                                             metadata_df$Cabinet_Pos_Reading2+
                                             metadata_df$Cabinet_Pos_Reading3+0.5)/
                                            (metadata_df$Cabinet_Neg_Reading1+
                                               metadata_df$Cabinet_Neg_Reading2+
                                               metadata_df$Cabinet_Neg_Reading3+0.5))
metadata_df$Cabinet_Sentiment_Overall[metadata_df$Cabinet_Pos_Reading1==0&
                                        metadata_df$Cabinet_Pos_Reading2==0&
                                        metadata_df$Cabinet_Pos_Reading3==0]=NA

metadata_df_speeches=metadata_df%>%filter(!is.na(Government_Sentiment_Overall)|!is.na(Opposition_Sentiment_Overall))

metadata_df_speeches$Gov_Oppo_Conflict=abs(metadata_df_speeches$Government_Sentiment_Overall-
                                             metadata_df_speeches$Opposition_Sentiment_Overall)
metadata_df_speeches$Gov_Oppo_Conflict_Reading1=abs(metadata_df_speeches$Government_Sentiment_Reading1-
                                                      metadata_df_speeches$Opposition_Sentiment_Reading1)
metadata_df_speeches$Gov_Oppo_Conflict_Reading2=abs(metadata_df_speeches$Government_Sentiment_Reading2-
                                                      metadata_df_speeches$Opposition_Sentiment_Reading2)
metadata_df_speeches$Gov_Oppo_Conflict_Reading3=abs(metadata_df_speeches$Government_Sentiment_Reading3-
                                                      metadata_df_speeches$Opposition_Sentiment_Reading3)

metadata_df_speeches$Gov_Cabinet_Conflict=abs(metadata_df_speeches$Government_Sentiment_Overall-
                                                metadata_df_speeches$Cabinet_Sentiment_Overall)
metadata_df_speeches$Gov_Cabinet_Conflict_Reading1=abs(metadata_df_speeches$Government_Sentiment_Reading1-
                                                         metadata_df_speeches$Cabinet_Sentiment_Reading1)
metadata_df_speeches$Gov_Cabinet_Conflict_Reading2=abs(metadata_df_speeches$Government_Sentiment_Reading2-
                                                         metadata_df_speeches$Cabinet_Sentiment_Reading2)
metadata_df_speeches$Gov_Cabinet_Conflict_Reading3=abs(metadata_df_speeches$Government_Sentiment_Reading3-
                                                         metadata_df_speeches$Cabinet_Sentiment_Reading3)

metadata_df_speeches$Prime_Minister_Junior_Coalition_Conflict=abs(metadata_df_speeches$Prime_Minister_Sentiment_Overall-
                                                                    metadata_df_speeches$Junior_Coalition_Partner_Sentiment_Overall)
metadata_df_speeches$Prime_Minister_Junior_Coalition_Conflict_Reading1=abs(metadata_df_speeches$Prime_Minister_Sentiment_Reading1-
                                                                             metadata_df_speeches$Junior_Coalition_Partner_Sentiment_Reading1)
metadata_df_speeches$Prime_Minister_Junior_Coalition_Conflict_Reading2=abs(metadata_df_speeches$Prime_Minister_Sentiment_Reading2-
                                                                             metadata_df_speeches$Junior_Coalition_Partner_Sentiment_Reading2)
metadata_df_speeches$Prime_Minister_Junior_Coalition_Conflict_Reading3=abs(metadata_df_speeches$Prime_Minister_Sentiment_Reading3-
                                                                             metadata_df_speeches$Junior_Coalition_Partner_Sentiment_Reading3)

#Define policy area
metadata_df_speeches$main_policy_area_foreign_relations=0
metadata_df_speeches$main_policy_area_foreign_relations[grepl("internatio",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_foreign_relations[grepl("Verteidigung",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_foreign_relations[grepl("Entwicklung",metadata_df_speeches$Sachgebiete)]=1

metadata_df_speeches$main_policy_area_domestic=0
metadata_df_speeches$main_policy_area_domestic[grepl("Zuwanderung",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Wissenschaft",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Bildung",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Kultur",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Staat",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Verkehr",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Raumordnung",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Energie",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Umwelt",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Gesundheit",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Gesellschaftspolitik",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Landwirtschaft",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Bundestag",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Bundesl",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Medien",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Sport",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Recht",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Parteien",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_domestic[grepl("Innere",metadata_df_speeches$Sachgebiete)]=1

metadata_df_speeches$main_policy_area_economy=0
metadata_df_speeches$main_policy_area_economy[grepl("wirtschaft",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_economy[grepl("Finanzen",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_economy[grepl("Wirtschaft",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_economy[grepl("Sicherung",metadata_df_speeches$Sachgebiete)]=1
metadata_df_speeches$main_policy_area_economy[grepl("Arbeit",metadata_df_speeches$Sachgebiete)]=1

metadata_df_speeches$main_policy_area_europe=0
metadata_df_speeches$main_policy_area_europe[grepl("Europapolitik",metadata_df_speeches$Sachgebiete)]=1

#depending on number of reading: define date of last debate
metadata_df_speeches$Date_of_last_debate=NA
metadata_df_speeches$Date_of_last_debate=as.Date(metadata_df_speeches$Date_of_BT_debate3,"%d.%m.%Y")
metadata_df_speeches$Date_of_last_debate[is.na(metadata_df_speeches$Date_of_last_debate)]=as.Date(metadata_df_speeches$Date_of_BT_debate2[is.na(metadata_df_speeches$Date_of_last_debate)],"%d.%m.%Y")
metadata_df_speeches$Date_of_last_debate[is.na(metadata_df_speeches$Date_of_last_debate)]=as.Date(metadata_df_speeches$Date_of_BT_debate1[is.na(metadata_df_speeches$Date_of_last_debate)],"%d.%m.%Y")

#calculate duration of process
metadata_df_speeches$duration=as.numeric(as.Date(metadata_df_speeches$Date_of_last_debate,"%d.%m.%Y")-as.Date(metadata_df_speeches$Date_of_initiative,"%d.%m.%Y"))

# Only keep speeches connected to bills introduced by the government
senti_germany_debates=senti_germany_debates[senti_germany_debates$Regierungsvorlage==1,]
# Only keep speechdata on bills that were debated and introduced by the government
metadata_speakers_complete_df=metadata_speakers_complete_df[metadata_speakers_complete_df$Drucksachennummer%in%senti_germany_debates$Drucksachennummer.x,]
# Only keep metadata on bills that were debated and introduced by the government
metadata_df_speeches=metadata_df_speeches[metadata_df_speeches$Drucksachennummer%in%senti_germany_debates$Drucksachennummer.x,]
senti_germany_debates$Date_of_thisspeech=as.Date("01.01.1990","%d.%m.%Y")
for(i in 1:nrow(senti_germany_debates)){
  if(senti_germany_debates$Reading[i]==1){
    senti_germany_debates$Date_of_thisspeech[i]=as.Date(senti_germany_debates$Date_of_BT_debate1[i],"%d.%m.%Y")
  }
  if(senti_germany_debates$Reading[i]==2){
    senti_germany_debates$Date_of_thisspeech[i]=as.Date(senti_germany_debates$Date_of_BT_debate2[i],"%d.%m.%Y")
  }
  if(senti_germany_debates$Reading[i]==3){
    senti_germany_debates$Date_of_thisspeech[i]=as.Date(senti_germany_debates$Date_of_BT_debate3[i],"%d.%m.%Y")
  }
}

metadata_df_speeches$non_economic=1-metadata_df_speeches$main_policy_area_economy

metadata_df_speeches$unanimity_last_reading=NA
metadata_df_speeches$unanimity_last_reading[metadata_df_speeches$Date_of_BT_debate3!="No 3rd debate"]=metadata_df_speeches$unanimity_third_reading[metadata_df_speeches$Date_of_BT_debate3!="No 3rd debate"]
metadata_df_speeches$unanimity_last_reading[metadata_df_speeches$Date_of_BT_debate2!="No 2nd debate"&is.na(metadata_df_speeches$unanimity_last_reading)]=metadata_df_speeches$unanimity_second_reading[metadata_df_speeches$Date_of_BT_debate2!="No 2nd debate"&is.na(metadata_df_speeches$unanimity_last_reading)]
metadata_df_speeches$unanimity_last_reading[is.na(metadata_df_speeches$unanimity_last_reading)]=0
table(metadata_df_speeches$unanimity_last_reading)
table(metadata_df_speeches$unanimity_any_reading)

metadata_df_speeches$Initiator2 <- ifelse(metadata_df_speeches$Initiator == "Bundesregierung",
                                          "cabinet_bill", "governing_party_bill")

metadata_df_speeches$unanimity_last_reading[is.na(metadata_df_speeches$unanimity_last_reading)]=0
#only keep bills in which everyone spoke
metadata_df_speeches=metadata_df_speeches%>%filter(!is.na(Prime_Minister_Sentiment_Overall)&!is.na(Junior_Coalition_Partner_Sentiment_Overall)&!is.na(Opposition_Sentiment_Overall))

save(senti_germany_debates,
     metadata_df_speeches,
     file="4_germany_datasets.RData")
